From 456642c9091ce62e2920a44b0cda53dd841c8a4f Mon Sep 17 00:00:00 2001 From: "mwilli2@equilibrium.research.intel-research.net" Date: Fri, 26 Mar 2004 18:32:29 +0000 Subject: [PATCH] bitkeeper revision 1.825.3.7 (4064773d4Vkaf0WFguSCpOO7O0qqEQ) Add Atropos code and update control interface. --- .rootkeys | 1 + tools/xc/lib/xc.h | 20 + tools/xc/lib/xc_atropos.c | 37 +- tools/xc/lib/xc_bvtsched.c | 48 +- tools/xc/lib/xc_misc.c | 17 + tools/xc/lib/xc_rrobin.c | 20 +- tools/xc/py/Xc.c | 150 ++++++- xen/common/dom0_ops.c | 11 +- xen/common/keyhandler.c | 32 +- xen/common/sched_atropos.c | 598 +++++++++++++++++++++++++ xen/common/sched_bvt.c | 65 ++- xen/common/sched_rrobin.c | 12 +- xen/common/schedule.c | 122 +++-- xen/include/hypervisor-ifs/dom0_ops.h | 14 +- xen/include/hypervisor-ifs/sched_ctl.h | 16 +- xen/include/xen/sched-if.h | 4 +- xen/include/xen/sched.h | 5 + 17 files changed, 1041 insertions(+), 131 deletions(-) create mode 100644 xen/common/sched_atropos.c diff --git a/.rootkeys b/.rootkeys index 03b625759b..007f0440b5 100644 --- a/.rootkeys +++ b/.rootkeys @@ -175,6 +175,7 @@ 4051bcecFeq4DE70p4zGO5setf47CA xen/common/physdev.c 4006e659i9j-doVxY7DKOGU4XVin1Q xen/common/rbtree.c 3ddb79bdHqdQpATqC0rmUZNbsb6L6A xen/common/resource.c +4064773cJ31vZt-zhbSoxqft1Jaw0w xen/common/sched_atropos.c 40589968dD2D1aejwSOvrROg7fOvGQ xen/common/sched_bvt.c 40589968be_t_n0-w6ggceW7h-sx0w xen/common/sched_rrobin.c 3e397e6619PgAfBbw2XFbXkewvUWgw xen/common/schedule.c diff --git a/tools/xc/lib/xc.h b/tools/xc/lib/xc.h index 1045be69d0..0abd00989e 100644 --- a/tools/xc/lib/xc.h +++ b/tools/xc/lib/xc.h @@ -74,6 +74,7 @@ int xc_netbsd_build(int xc_handle, int xc_bvtsched_global_set(int xc_handle, unsigned long ctx_allow); + int xc_bvtsched_domain_set(int xc_handle, u64 domid, unsigned long mcuadv, @@ -81,13 +82,32 @@ int xc_bvtsched_domain_set(int xc_handle, unsigned long warpl, unsigned long warpu); +int xc_bvtsched_global_get(int xc_handle, + unsigned long *ctx_allow); + +int xc_bvtsched_domain_get(int xc_handle, + u64 domid, + unsigned long *mcuadv, + unsigned long *warp, + unsigned long *warpl, + unsigned long *warpu); + int xc_atropos_domain_set(int xc_handle, u64 domid, + u64 period, u64 slice, u64 latency, int xtratime); +int xc_atropos_domain_get(int xc_handle, + u64 domid, + u64* period, u64 *slice, u64 *latency, + int *xtratime); + int xc_rrobin_global_set(int xc_handle, u64 slice); +int xc_rrobin_global_get(int xc_handle, + u64 *slice); + typedef struct { unsigned long credit_bytes; unsigned long credit_usec; diff --git a/tools/xc/lib/xc_atropos.c b/tools/xc/lib/xc_atropos.c index 06ba01cf32..3b4535d96d 100644 --- a/tools/xc/lib/xc_atropos.c +++ b/tools/xc/lib/xc_atropos.c @@ -8,29 +8,44 @@ #include "xc_private.h" -int xc_atropos_global_set(int xc_handle, - unsigned long ctx_allow) +int xc_atropos_domain_set(int xc_handle, + u64 domid, u64 period, u64 slice, u64 latency, + int xtratime) { dom0_op_t op; + struct atropos_adjdom *p = &op.u.adjustdom.u.atropos; - op.cmd = DOM0_SCHEDCTL; - op.u.schedctl.sched_id = SCHED_BVT; + op.cmd = DOM0_ADJUSTDOM; + op.u.adjustdom.domain = (domid_t)domid; + op.u.adjustdom.sched_id = SCHED_ATROPOS; + op.u.adjustdom.direction = SCHED_INFO_PUT; - op.u.schedctl.u.bvt.ctx_allow = ctx_allow; + p->period = period; + p->slice = slice; + p->latency = latency; + p->xtratime = xtratime; return do_dom0_op(xc_handle, &op); } -int xc_atropos_domain_set(int xc_handle, - u64 domid, int xtratime) +int xc_atropos_domain_get(int xc_handle, u64 domid, u64 *period, + u64 *slice, u64 *latency, int *xtratime) { dom0_op_t op; + int ret; + struct atropos_adjdom *p = &op.u.adjustdom.u.atropos; - op.cmd = DOM0_ADJUSTDOM; - op.u.adjustdom.domain = (domid_t)domid; + op.cmd = DOM0_ADJUSTDOM; + op.u.adjustdom.domain = (domid_t)domid; op.u.adjustdom.sched_id = SCHED_ATROPOS; + op.u.adjustdom.direction = SCHED_INFO_GET; - op.u.adjustdom.u.atropos.xtratime = xtratime; + ret = do_dom0_op(xc_handle, &op); - return do_dom0_op(xc_handle, &op); + *period = p->period; + *slice = p->slice; + *latency = p->latency; + *xtratime = p->xtratime; + + return ret; } diff --git a/tools/xc/lib/xc_bvtsched.c b/tools/xc/lib/xc_bvtsched.c index 428c2d6c32..e5106b561b 100644 --- a/tools/xc/lib/xc_bvtsched.c +++ b/tools/xc/lib/xc_bvtsched.c @@ -15,12 +15,29 @@ int xc_bvtsched_global_set(int xc_handle, op.cmd = DOM0_SCHEDCTL; op.u.schedctl.sched_id = SCHED_BVT; - + op.u.schedctl.direction = SCHED_INFO_PUT; op.u.schedctl.u.bvt.ctx_allow = ctx_allow; return do_dom0_op(xc_handle, &op); } +int xc_bvtsched_global_get(int xc_handle, + unsigned long *ctx_allow) +{ + dom0_op_t op; + int ret; + + op.cmd = DOM0_SCHEDCTL; + op.u.schedctl.sched_id = SCHED_BVT; + op.u.schedctl.direction = SCHED_INFO_GET; + + ret = do_dom0_op(xc_handle, &op); + + *ctx_allow = op.u.schedctl.u.bvt.ctx_allow; + + return ret; +} + int xc_bvtsched_domain_set(int xc_handle, u64 domid, unsigned long mcuadv, @@ -34,11 +51,38 @@ int xc_bvtsched_domain_set(int xc_handle, op.cmd = DOM0_ADJUSTDOM; op.u.adjustdom.domain = (domid_t)domid; op.u.adjustdom.sched_id = SCHED_BVT; + op.u.adjustdom.direction = SCHED_INFO_PUT; bvtadj->mcu_adv = mcuadv; bvtadj->warp = warp; bvtadj->warpl = warpl; bvtadj->warpu = warpu; - return do_dom0_op(xc_handle, &op); } + + +int xc_bvtsched_domain_get(int xc_handle, + u64 domid, + unsigned long *mcuadv, + unsigned long *warp, + unsigned long *warpl, + unsigned long *warpu) +{ + + dom0_op_t op; + int ret; + struct bvt_adjdom *adjptr = &op.u.adjustdom.u.bvt; + + op.cmd = DOM0_ADJUSTDOM; + op.u.adjustdom.domain = (domid_t)domid; + op.u.adjustdom.sched_id = SCHED_BVT; + op.u.adjustdom.direction = SCHED_INFO_GET; + + ret = do_dom0_op(xc_handle, &op); + + *mcuadv = adjptr->mcu_adv; + *warp = adjptr->warp; + *warpl = adjptr->warpl; + *warpu = adjptr->warpu; + return ret; +} diff --git a/tools/xc/lib/xc_misc.c b/tools/xc/lib/xc_misc.c index 15fcead97c..9f087d56fb 100644 --- a/tools/xc/lib/xc_misc.c +++ b/tools/xc/lib/xc_misc.c @@ -68,3 +68,20 @@ int xc_physinfo(int xc_handle, return 0; } + +int xc_sched_id(int xc_handle, + int *sched_id) +{ + int ret; + dom0_op_t op; + + op.cmd = DOM0_SCHED_ID; + op.interface_version = DOM0_INTERFACE_VERSION; + + if((ret = do_dom0_op(xc_handle, &op))) return ret; + + *sched_id = op.u.sched_id.sched_id; + + return 0; +} + diff --git a/tools/xc/lib/xc_rrobin.c b/tools/xc/lib/xc_rrobin.c index c915508050..ad37962f3b 100644 --- a/tools/xc/lib/xc_rrobin.c +++ b/tools/xc/lib/xc_rrobin.c @@ -11,11 +11,27 @@ int xc_rrobin_global_set(int xc_handle, u64 slice) { dom0_op_t op; - op.cmd = DOM0_SCHEDCTL; op.u.schedctl.sched_id = SCHED_RROBIN; + op.u.schedctl.direction = SCHED_INFO_PUT; op.u.schedctl.u.rrobin.slice = slice; - return do_dom0_op(xc_handle, &op); } + + +int xc_rrobin_global_get(int xc_handle, u64 *slice) +{ + dom0_op_t op; + int ret; + + op.cmd = DOM0_SCHEDCTL; + op.u.schedctl.sched_id = SCHED_RROBIN; + op.u.schedctl.direction = SCHED_INFO_GET; + + ret = do_dom0_op(xc_handle, &op); + + *slice = op.u.schedctl.u.rrobin.slice; + + return ret; +} diff --git a/tools/xc/py/Xc.c b/tools/xc/py/Xc.c index 96b9bf491a..3cbbe7efa6 100644 --- a/tools/xc/py/Xc.c +++ b/tools/xc/py/Xc.c @@ -281,6 +281,23 @@ static PyObject *pyxc_bvtsched_global_set(PyObject *self, return zero; } +static PyObject *pyxc_bvtsched_global_get(PyObject *self, + PyObject *args, + PyObject *kwds) +{ + XcObject *xc = (XcObject *)self; + + unsigned long ctx_allow; + + if ( !PyArg_ParseTuple(args, "") ) + return NULL; + + if ( xc_bvtsched_global_get(xc->xc_handle, &ctx_allow) != 0 ) + return PyErr_SetFromErrno(xc_error); + + return Py_BuildValue("s:l", "ctx_allow", ctx_allow); +} + static PyObject *pyxc_bvtsched_domain_set(PyObject *self, PyObject *args, PyObject *kwds) @@ -305,6 +322,31 @@ static PyObject *pyxc_bvtsched_domain_set(PyObject *self, return zero; } +static PyObject *pyxc_bvtsched_domain_get(PyObject *self, + PyObject *args, + PyObject *kwds) +{ + XcObject *xc = (XcObject *)self; + u64 dom; + unsigned long mcuadv, warp, warpl, warpu; + + static char *kwd_list[] = { "dom", NULL }; + + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "L", kwd_list, &dom) ) + return NULL; + + if ( xc_bvtsched_domain_get(xc->xc_handle, dom, &mcuadv, &warp, + &warpl, &warpu) != 0 ) + return PyErr_SetFromErrno(xc_error); + + return Py_BuildValue("{s:L,s:l,s:l,s:l,s:l}", + "domain", dom, + "mcuadv", mcuadv, + "warp", warp, + "warpl", warpl, + "warpu", warpu); +} + static PyObject *pyxc_vif_scheduler_set(PyObject *self, PyObject *args, PyObject *kwds) @@ -879,22 +921,52 @@ static PyObject *pyxc_atropos_domain_set(PyObject *self, PyObject *kwds) { XcObject *xc = (XcObject *)self; - int xtratime; u64 domid; + u64 period, slice, latency; + int xtratime; - static char *kwd_list[] = { "dom", "xtratime", NULL }; + static char *kwd_list[] = { "dom", "period", "slice", "latency", + "xtratime", NULL }; - if( !PyArg_ParseTupleAndKeywords(args, kwds, "Li", kwd_list, &domid, - &xtratime) ) + if( !PyArg_ParseTupleAndKeywords(args, kwds, "LLLLi", kwd_list, &domid, + &period, &slice, &latency, &xtratime) ) return NULL; - if ( xc_atropos_domain_set(xc->xc_handle, domid, xtratime) != 0 ) + if ( xc_atropos_domain_set(xc->xc_handle, domid, period, slice, + latency, xtratime) != 0 ) return PyErr_SetFromErrno(xc_error); Py_INCREF(zero); return zero; } +static PyObject *pyxc_atropos_domain_get(PyObject *self, + PyObject *args, + PyObject *kwds) +{ + XcObject *xc = (XcObject *)self; + u64 domid; + u64 period, slice, latency; + int xtratime; + + static char *kwd_list[] = { "dom", NULL }; + + if( !PyArg_ParseTupleAndKeywords(args, kwds, "L", kwd_list, &domid) ) + return NULL; + + if ( xc_atropos_domain_get( xc->xc_handle, domid, &period, + &slice, &latency, &xtratime ) ) + return PyErr_SetFromErrno(xc_error); + + return Py_BuildValue("{s:L,s:L,s:L,s:L,s:i}", + "domain", domid, + "period", period, + "slice", slice, + "latency", latency, + "xtratime", xtratime); +} + + static PyObject *pyxc_rrobin_global_set(PyObject *self, PyObject *args, PyObject *kwds) @@ -914,6 +986,22 @@ static PyObject *pyxc_rrobin_global_set(PyObject *self, return zero; } +static PyObject *pyxc_rrobin_global_get(PyObject *self, + PyObject *args, + PyObject *kwds) +{ + XcObject *xc = (XcObject *)self; + u64 slice; + + if ( !PyArg_ParseTuple(args, "") ) + return NULL; + + if ( xc_rrobin_global_get(xc->xc_handle, &slice) != 0 ) + return PyErr_SetFromErrno(xc_error); + + return Py_BuildValue("s:L", "slice", slice); +} + static PyMethodDef pyxc_methods[] = { { "domain_create", @@ -1015,6 +1103,13 @@ static PyMethodDef pyxc_methods[] = { " ctx_allow [int]: Minimal guaranteed quantum (I think!).\n\n" "Returns: [int] 0 on success; -1 on error.\n" }, + { "bvtsched_global_get", + (PyCFunction)pyxc_bvtsched_global_get, + METH_KEYWORDS, "\n" + "Get global tuning parameters for BVT scheduler.\n" + "Returns: [dict]:\n" + " ctx_allow [int]: context switch allowance\n" }, + { "bvtsched_domain_set", (PyCFunction)pyxc_bvtsched_domain_set, METH_VARARGS | METH_KEYWORDS, "\n" @@ -1026,21 +1121,56 @@ static PyMethodDef pyxc_methods[] = { " warpu [int]: Internal BVT parameter.\n\n" "Returns: [int] 0 on success; -1 on error.\n" }, + { "bvtsched_domain_get", + (PyCFunction)pyxc_bvtsched_domain_get, + METH_KEYWORDS, "\n" + "Get per-domain tuning parameters under the BVT scheduler.\n" + " dom [long]: Identifier of domain to be queried.\n" + "Returns [dict]:\n" + " domain [long]: Domain ID.\n" + " mcuadv [long]: MCU Advance.\n" + " warp [long]: Warp.\n" + " warpu [long]:\n" + " warpl [long]: Warp limit,\n" + }, + { "atropos_domain_set", (PyCFunction)pyxc_atropos_domain_set, - METH_VARARGS | METH_KEYWORDS, "\n" - "Set the extra time flag for a domain when running with Atropos.\n" - " dom [long]: domain to set\n" + METH_KEYWORDS, "\n" + "Set the scheduling parameters for a domain when running with Atropos.\n" + " dom [long]: domain to set\n" + " period [long]: domain's scheduling period\n" + " slice [long]: domain's slice per period\n" + " latency [long]: wakeup latency hint\n" " xtratime [int]: boolean\n" "Returns: [int] 0 on success; -1 on error.\n" }, + { "atropos_domain_get", + (PyCFunction)pyxc_atropos_domain_get, + METH_KEYWORDS, "\n" + "Get the current scheduling parameters for a domain when running with\n" + "the Atropos scheduler." + " dom [long]: domain to query\n" + "Returns: [dict]\n" + " domain [long]: domain ID\n" + " period [long]: scheduler period\n" + " slice [long]: CPU reservation per period\n" + " latency [long]: unblocking latency hint\n" + " xtratime [int] : 0 if not using slack time, nonzero otherwise\n" }, + { "rrobin_global_set", (PyCFunction)pyxc_rrobin_global_set, METH_KEYWORDS, "\n" "Set Round Robin scheduler slice.\n" " slice [long]: Round Robin scheduler slice\n" - "Returns: [int] 0 on success, throws an exception on failure\n" - }, + "Returns: [int] 0 on success, throws an exception on failure\n" }, + + { "rrobin_global_get", + (PyCFunction)pyxc_rrobin_global_get, + METH_KEYWORDS, "\n" + "Get Round Robin scheduler settings\n" + "Returns [dict]:\n" + " slice [long]: Scheduler time slice.\n" }, { "vif_scheduler_set", (PyCFunction)pyxc_vif_scheduler_set, diff --git a/xen/common/dom0_ops.c b/xen/common/dom0_ops.c index a5f7041f0b..4c1a3cdefd 100644 --- a/xen/common/dom0_ops.c +++ b/xen/common/dom0_ops.c @@ -200,12 +200,14 @@ long do_dom0_op(dom0_op_t *u_dom0_op) case DOM0_SCHEDCTL: { ret = sched_ctl(&op->u.schedctl); + copy_to_user(u_dom0_op, op, sizeof(*op)); } break; case DOM0_ADJUSTDOM: { ret = sched_adjdom(&op->u.adjustdom); + copy_to_user(u_dom0_op, op, sizeof(*op)); } break; @@ -275,7 +277,6 @@ long do_dom0_op(dom0_op_t *u_dom0_op) if ( (p->state == TASK_STOPPED) || (p->state == TASK_DYING) ) op->u.getdomaininfo.state = DOMSTATE_STOPPED; op->u.getdomaininfo.hyp_events = p->hyp_events; -// op->u.getdomaininfo.mcu_advance = p->mcu_advance; op->u.getdomaininfo.tot_pages = p->tot_pages; op->u.getdomaininfo.cpu_time = p->cpu_time; op->u.getdomaininfo.shared_info_frame = @@ -485,6 +486,14 @@ long do_dom0_op(dom0_op_t *u_dom0_op) op->u.pcidev_access.enable); } break; + + case DOM0_SCHED_ID: + { + op->u.sched_id.sched_id = sched_id(); + + copy_to_user(u_dom0_op, op, sizeof(*op)); + ret = 0; + } default: ret = -ENOSYS; diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c index e2eed7a85c..734df5cffa 100644 --- a/xen/common/keyhandler.c +++ b/xen/common/keyhandler.c @@ -4,6 +4,7 @@ #include #include #include +#include #define KEY_MAX 256 #define STR_MAX 64 @@ -74,29 +75,6 @@ static void kill_dom0(u_char key, void *dev_id, struct pt_regs *regs) kill_other_domain(0, 0); } - -/* XXX SMH: this is keir's fault */ -static char *task_states[] = -{ - "Runnable ", - "Int Sleep ", - "UInt Sleep", - NULL, - "Stopped ", - NULL, - NULL, - NULL, - "Dying ", - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - "Sched priv" -}; - void do_task_queues(u_char key, void *dev_id, struct pt_regs *regs) { unsigned long flags; @@ -111,10 +89,10 @@ void do_task_queues(u_char key, void *dev_id, struct pt_regs *regs) for_each_domain ( p ) { - printk("Xen: DOM %llu, CPU %d [has=%c], state = %s, " - "hyp_events = %08x\n", - p->domain, p->processor, p->has_cpu ? 'T':'F', - task_states[p->state], p->hyp_events); + printk("Xen: DOM %llu, CPU %d [has=%c], state = ", + p->domain, p->processor, p->has_cpu ? 'T':'F'); + sched_prn_state(p ->state); + printk(", hyp_events = %08x\n", p->hyp_events); s = p->shared_info; printk("Guest: upcall_pend = %08lx, upcall_mask = %08lx\n", s->evtchn_upcall_pending, s->evtchn_upcall_mask); diff --git a/xen/common/sched_atropos.c b/xen/common/sched_atropos.c new file mode 100644 index 0000000000..1a5fd792aa --- /dev/null +++ b/xen/common/sched_atropos.c @@ -0,0 +1,598 @@ +/* + * atropos.c + * --------- + * + * Copyright (c) 1994 University of Cambridge Computer Laboratory. + * This is part of Nemesis; consult your contract for terms and conditions. + * + * ID : $Id: atropos.c 1.1 Tue, 13 Apr 1999 13:30:49 +0100 dr10009 $ + * + * This is the "atropos" CPU scheduler. + */ + +/* Ported to Xen's generic scheduler interface by Mark Williamson + * these modifications are (C) 2004 Intel Research Cambridge + */ + +#include +#include +#include +#include +#include + +#define ATROPOS_TASK_UNBLOCKED 16 +#define ATROPOS_TASK_WAIT 32 + +#define Activation_Reason_Allocated 1 +#define Activation_Reason_Preempted 2 +#define Activation_Reason_Extra 3 + +/* The following will be used for atropos-specific per-domain data fields */ +struct at_dom_info +{ + /* MAW Xen additions */ + struct task_struct *owner; /* the struct task_struct this data belongs to */ + struct list_head waitq; /* wait queue */ + int reason; /* reason domain was last scheduled */ + + /* (what remains of) the original fields */ + + s_time_t deadline; /* Next deadline */ + s_time_t prevddln; /* Previous deadline */ + + s_time_t remain; /* Time remaining this period */ + s_time_t period; /* Period of time allocation */ + s_time_t slice; /* Length of allocation */ + s_time_t latency; /* Unblocking latency */ + + int xtratime; /* Prepared to accept extra? */ +}; + + +struct at_cpu_info +{ + struct list_head waitq; /* wait queue*/ +}; + + +#define DOM_INFO(_p) ( (struct at_dom_info *)((_p)->sched_priv) ) +#define CPU_INF(_p) ( (struct at_cpu_info *)((_p).sched_priv) ) +#define WAITQ(cpu) (&( CPU_INF(schedule_data[cpu]) )->waitq ) +#define RUNQ(cpu) (&schedule_data[cpu].runqueue) + +#define BESTEFFORT_QUANTUM MILLISECS(5) + +/* SLAB cache for struct at_dom_info objects */ +static kmem_cache_t *dom_info_cache; + +/** calculate the length of a linked list */ +static int q_len(struct list_head *q) +{ + int i = 0; + struct list_head *tmp; + list_for_each(tmp, q) i++; + return i; +} + + +/** waitq_el - get the task_struct that owns a wait queue list element */ +static inline struct task_struct * waitq_el(struct list_head *l) +{ + struct at_dom_info *inf; + inf = list_entry(l, struct at_dom_info, waitq); + return inf->owner; +} + + +/* + * requeue + * + * Places the specified domain on the appropriate queue. + * The wait queue is ordered by the time at which the domain + * will receive more CPU time. If a domain has no guaranteed time + * left then the domain will be placed on the WAIT queue until + * its next period. + * + * Note that domains can be on the wait queue with remain > 0 + * as a result of being blocked for a short time. + * These are scheduled in preference to domains with remain < 0 + * in an attempt to improve interactive performance. + */ +static void requeue(struct task_struct *sdom) +{ + struct at_dom_info *inf = DOM_INFO(sdom); + struct list_head *prev = WAITQ(sdom->processor); + struct list_head *next; + + if(sdom->state == ATROPOS_TASK_WAIT || + sdom->state == ATROPOS_TASK_UNBLOCKED ) + { + /* insert into ordered wait queue */ + + prev = WAITQ(sdom->processor); + list_for_each(next, WAITQ(sdom->processor)) + { + struct at_dom_info *i = list_entry(next, struct at_dom_info, waitq); + if( i->deadline > inf->deadline ) + { + __list_add(&inf->waitq, prev, next); + break; + } + + prev = next; + } + + /* put the domain on the end of the list if it hasn't been put + * elsewhere */ + if ( next == WAITQ(sdom->processor)) + list_add_tail(&inf->waitq, WAITQ(sdom->processor)); + } + else if(sdom->state == TASK_RUNNING) + { + /* insert into ordered run queue */ + prev = RUNQ(sdom->processor); + + list_for_each(next, RUNQ(sdom->processor)) + { + struct task_struct *p = list_entry(next, struct task_struct, + run_list); + + if( DOM_INFO(p)->deadline > inf->deadline || is_idle_task(p) ) + { + __list_add(&sdom->run_list, prev, next); + break; + } + + prev = next; + } + + if ( next == RUNQ(sdom->processor) ) + list_add_tail(&sdom->run_list, RUNQ(sdom->processor)); + } + /* silently ignore tasks in other states like BLOCKED, DYING, STOPPED, etc + * - they shouldn't be on any queue */ +} + +/* prepare a task to be added to scheduling */ +static void at_add_task(struct task_struct *p) +{ + s_time_t now = NOW(); + + ASSERT( p->sched_priv != NULL ); + + DOM_INFO(p)->owner = p; + p->lastschd = now; + + if(is_idle_task(p)) + DOM_INFO(p)->slice = MILLISECS(5); + + /* DOM 0's scheduling parameters must be set here in order for it to boot + * the system! */ + if(p->domain == 0) + { + DOM_INFO(p)->remain = MILLISECS(15); + DOM_INFO(p)->period = MILLISECS(20); + DOM_INFO(p)->slice = MILLISECS(15); + DOM_INFO(p)->latency = MILLISECS(10); + DOM_INFO(p)->xtratime = 1; + DOM_INFO(p)->deadline = now; + DOM_INFO(p)->prevddln = now; + } + else /* other domains run basically best effort unless otherwise set */ + { + DOM_INFO(p)->remain = 0; + DOM_INFO(p)->period = MILLISECS(10000); + DOM_INFO(p)->slice = MILLISECS(10); + DOM_INFO(p)->latency = MILLISECS(10000); + DOM_INFO(p)->xtratime = 1; + DOM_INFO(p)->deadline = now + MILLISECS(10000); + DOM_INFO(p)->prevddln = 0; + } + + INIT_LIST_HEAD(&(DOM_INFO(p)->waitq)); +} + + +/** + * dequeue - remove a domain from any queues it is on. + * @sdom: the task to remove + */ +static void dequeue(struct task_struct *sdom) +{ + struct at_dom_info *inf = DOM_INFO(sdom); + + ASSERT(sdom->domain != IDLE_DOMAIN_ID); + + /* just delete it from all the queues! */ + list_del(&inf->waitq); + INIT_LIST_HEAD(&inf->waitq); + + if(__task_on_runqueue(sdom)) + __del_from_runqueue(sdom); + + sdom->run_list.next = NULL; + sdom->run_list.prev = NULL; + +} + + +/* + * unblock + * + * This function deals with updating the sdom for a domain + * which has just been unblocked. + * + * ASSERT: On entry, the sdom has already been removed from the block + * queue (it can be done more efficiently if we know that it + * is on the head of the queue) but its deadline field has not been + * restored yet. + */ +static void unblock(struct task_struct *sdom) +{ + s_time_t time = NOW(); + struct at_dom_info *inf = DOM_INFO(sdom); + + dequeue(sdom); + + /* We distinguish two cases... short and long blocks */ + if ( inf->deadline < time ) { + /* The sdom has passed its deadline since it was blocked. + Give it its new deadline based on the latency value. */ + inf->prevddln = time; + inf->deadline = time + inf->latency; + inf->remain = inf->slice; + if(inf->remain > 0) + sdom->state = TASK_RUNNING; + else + sdom->state = ATROPOS_TASK_WAIT; + + } else { + /* We leave REMAIN intact, but put this domain on the WAIT + queue marked as recently unblocked. It will be given + priority over other domains on the wait queue until while + REMAIN>0 in a generous attempt to help it make up for its + own foolishness. */ + if(inf->remain > 0) + sdom->state = ATROPOS_TASK_UNBLOCKED; + else + sdom->state = ATROPOS_TASK_WAIT; + } + + requeue(sdom); + +} + +/** + * ATROPOS - main scheduler function + */ +task_slice_t ksched_scheduler(s_time_t time) +{ + struct task_struct *cur_sdom = current; /* Current sdom */ + s_time_t newtime; + s_time_t ranfor; /* How long the domain ran */ + struct task_struct *sdom; /* tmp. scheduling domain */ + int reason; /* reason for reschedule */ + int cpu = cur_sdom->processor; /* current CPU */ + struct at_dom_info *cur_info; + static unsigned long waitq_rrobin = 0; + int i; + task_slice_t ret; + + cur_info = DOM_INFO(cur_sdom); + + ASSERT( cur_sdom != NULL); + + /* If we were spinning in the idle loop, there is no current + * domain to deschedule. */ + if (is_idle_task(cur_sdom)) { + goto deschedule_done; + } + + /***************************** + * + * Deschedule the current scheduling domain + * + ****************************/ + + /* Record the time the domain was preempted and for how long it + ran. Work out if the domain is going to be blocked to save + some pointless queue shuffling */ + cur_sdom->lastdeschd = time; + + ranfor = (time - cur_sdom->lastschd); + + dequeue(cur_sdom); + + if ((cur_sdom->state == TASK_RUNNING) || + (cur_sdom->state == ATROPOS_TASK_UNBLOCKED)) { + + /* In this block, we are doing accounting for an sdom which has + been running in contracted time. Note that this could now happen + even if the domain is on the wait queue (i.e. if it blocked) */ + + /* Deduct guaranteed time from the domain */ + cur_info->remain -= ranfor; + + /* If guaranteed time has run out... */ + if ( cur_info->remain <= 0 ) { + /* Move domain to correct position in WAIT queue */ + /* XXX sdom_unblocked doesn't need this since it is + already in the correct place. */ + cur_sdom->state = ATROPOS_TASK_WAIT; + } + } + + requeue(cur_sdom); + + deschedule_done: + + /***************************** + * + * We have now successfully descheduled the current sdom. + * The next task is the allocate CPU time to any sdom it is due to. + * + ****************************/ + cur_sdom = NULL; + + /***************************** + * + * Allocate CPU time to any waiting domains who have passed their + * period deadline. If necessary, move them to run queue. + * + ****************************/ + while(!list_empty(WAITQ(cpu)) && + DOM_INFO(sdom = waitq_el(WAITQ(cpu)->next))->deadline <= time ) { + + struct at_dom_info *inf = DOM_INFO(sdom); + + dequeue(sdom); + + /* Domain begins a new period and receives a slice of CPU + * If this domain has been blocking then throw away the + * rest of it's remain - it can't be trusted */ + if (inf->remain > 0) + inf->remain = inf->slice; + else + inf->remain += inf->slice; + inf->prevddln = inf->deadline; + inf->deadline += inf->period; + if(inf->remain > 0) + sdom->state = TASK_RUNNING; + else + sdom->state = ATROPOS_TASK_WAIT; + + /* Place on the appropriate queue */ + requeue(sdom); + } + + /***************************** + * + * Next we need to pick an sdom to run. + * If anything is actually 'runnable', we run that. + * If nothing is, we pick a waiting sdom to run optimistically. + * If there aren't even any of those, we have to spin waiting for an + * event or a suitable time condition to happen. + * + ****************************/ + + /* we guarantee there's always something on the runqueue */ + cur_sdom = list_entry(RUNQ(cpu)->next, + struct task_struct, run_list); + + cur_info = DOM_INFO(cur_sdom); + newtime = time + cur_info->remain; + reason = (cur_info->prevddln > cur_sdom->lastschd) ? + Activation_Reason_Allocated : Activation_Reason_Preempted; + + /* MAW - the idle domain is always on the run queue. We run from the + * runqueue if it's NOT the idle domain or if there's nothing on the wait + * queue */ + if (cur_sdom->domain == IDLE_DOMAIN_ID && !list_empty(WAITQ(cpu))) { + + struct list_head *item; + + /* Try running a domain on the WAIT queue - this part of the + scheduler isn't particularly efficient but then again, we + don't have any guaranteed domains to worry about. */ + + /* See if there are any unblocked domains on the WAIT + queue who we can give preferential treatment to. */ + list_for_each(item, WAITQ(cpu)) + { + struct at_dom_info *inf = + list_entry(item, struct at_dom_info, waitq); + + sdom = inf->owner; + + if (sdom->state == ATROPOS_TASK_UNBLOCKED) { + cur_sdom = sdom; + cur_info = inf; + newtime = time + inf->remain; + reason = Activation_Reason_Preempted; + goto found; + } + } + + /* init values needed to approximate round-robin for slack time */ + i = 0; + if ( waitq_rrobin >= q_len(WAITQ(cpu))) + waitq_rrobin = 0; + + /* Last chance: pick a domain on the wait queue with the XTRA + flag set. The NEXT_OPTM field is used to cheaply achieve + an approximation of round-robin order */ + list_for_each(item, WAITQ(cpu)) + { + struct at_dom_info *inf = + list_entry(item, struct at_dom_info, waitq); + + sdom = inf->owner; + + if (inf->xtratime && i >= waitq_rrobin) { + cur_sdom = sdom; + cur_info = inf; + newtime = time + BESTEFFORT_QUANTUM; + reason = Activation_Reason_Extra; + waitq_rrobin = i + 1; /* set this value ready for next */ + goto found; + } + + i++; + } + + } + + found: + /********************** + * + * We now have to work out the time when we next need to + * make a scheduling decision. We set the alarm timer + * to cause an interrupt at that time. + * + **********************/ + +#define MIN(x,y) ( ( x < y ) ? x : y ) +#define MAX(x,y) ( ( x > y ) ? x : y ) + + /* If we might be able to run a waiting domain before this one has */ + /* exhausted its time, cut short the time allocation */ + if (!list_empty(WAITQ(cpu))) + { + newtime = MIN(newtime, + DOM_INFO(waitq_el(WAITQ(cpu)->next))->deadline); + } + + /* don't allow pointlessly small time slices */ + newtime = MAX(newtime, time + BESTEFFORT_QUANTUM); + + ret.task = cur_sdom; + ret.time = newtime - time; + + cur_sdom->min_slice = newtime - time; + DOM_INFO(cur_sdom)->reason = reason; + + TRACE_2D(0, cur_sdom->domain >> 32, (u32)cur_sdom->domain); + + return ret; +} + + +/* set up some private data structures */ +static int at_init_scheduler() +{ + int i; + + for( i = 0; i < NR_CPUS; i++) + { + if( (CPU_INF(schedule_data[i]) = kmalloc(sizeof(struct at_cpu_info), + GFP_KERNEL)) == NULL ) + return -1; + WAITQ(i)->next = WAITQ(i); + WAITQ(i)->prev = WAITQ(i); + } + + dom_info_cache = kmem_cache_create("Atropos dom info", + sizeof(struct at_dom_info), + 0, 0, NULL, NULL); + + return 0; +} + +/* dump relevant per-cpu state for a run queue dump */ +static void at_dump_cpu_state(int cpu) +{ + printk("Waitq len: %d Runq len: %d ", + q_len(WAITQ(cpu)), + q_len(RUNQ(cpu))); +} + +/* print relevant per-domain info for a run queue dump */ +static void at_dump_runq_el(struct task_struct *p) +{ + printk("lastschd = %llu, xtratime = %d ", + p->lastschd, DOM_INFO(p)->xtratime); +} + + +/* set or fetch domain scheduling parameters */ +static int at_adjdom(struct task_struct *p, struct sched_adjdom_cmd *cmd) +{ + if ( cmd->direction == SCHED_INFO_PUT ) + { + DOM_INFO(p)->period = cmd->u.atropos.period; + DOM_INFO(p)->slice = cmd->u.atropos.slice; + DOM_INFO(p)->latency = cmd->u.atropos.latency; + DOM_INFO(p)->xtratime = !!cmd->u.atropos.xtratime; + } + else if ( cmd->direction == SCHED_INFO_GET ) + { + cmd->u.atropos.period = DOM_INFO(p)->period; + cmd->u.atropos.slice = DOM_INFO(p)->slice; + cmd->u.atropos.latency = DOM_INFO(p)->latency; + cmd->u.atropos.xtratime = DOM_INFO(p)->xtratime; + } + + return 0; +} + + +/** at_alloc_task - allocate private info for a task */ +static int at_alloc_task(struct task_struct *p) +{ + ASSERT(p != NULL); + + if( (DOM_INFO(p) = kmem_cache_alloc(dom_info_cache, GFP_KERNEL)) == NULL ) + return -1; + + if(p->domain == IDLE_DOMAIN_ID) + printk("ALLOC IDLE ON CPU %d\n", p->processor); + + memset(DOM_INFO(p), 0, sizeof(struct at_dom_info)); + + return 0; +} + + +/* free memory associated with a task */ +static void at_free_task(struct task_struct *p) +{ + kmem_cache_free( dom_info_cache, DOM_INFO(p) ); +} + +/* print decoded domain private state value (if known) */ +static int at_prn_state(int state) +{ + int ret = 0; + + switch(state) + { + case ATROPOS_TASK_UNBLOCKED: + printk("Unblocked"); + break; + case ATROPOS_TASK_WAIT: + printk("Wait"); + break; + default: + ret = -1; + } + + return ret; +} + + +struct scheduler sched_atropos_def = { + .name = "Atropos Soft Real Time Scheduler", + .opt_name = "atropos", + .sched_id = SCHED_ATROPOS, + + .init_scheduler = at_init_scheduler, + .alloc_task = at_alloc_task, + .add_task = at_add_task, + .free_task = at_free_task, + .wake_up = unblock, + .do_schedule = ksched_scheduler, + .adjdom = at_adjdom, + .dump_cpu_state = at_dump_cpu_state, + .dump_runq_el = at_dump_runq_el, + .prn_state = at_prn_state, +}; diff --git a/xen/common/sched_bvt.c b/xen/common/sched_bvt.c index 3052d1e98c..4e77d58948 100644 --- a/xen/common/sched_bvt.c +++ b/xen/common/sched_bvt.c @@ -151,6 +151,7 @@ void bvt_wake_up(struct task_struct *p) struct bvt_dom_info *inf = BVT_INFO(p); ASSERT(inf != NULL); + /* set the BVT parameters */ if (inf->avt < CPU_SVT(p->processor)) @@ -166,19 +167,25 @@ void bvt_wake_up(struct task_struct *p) /* * Block the currently-executing domain until a pertinent event occurs. */ -static long bvt_do_block(struct task_struct *p) +static void bvt_do_block(struct task_struct *p) { BVT_INFO(p)->warpback = 0; - return 0; } /* Control the scheduler. */ int bvt_ctl(struct sched_ctl_cmd *cmd) { struct bvt_ctl *params = &cmd->u.bvt; - - ctx_allow = params->ctx_allow; + if ( cmd->direction == SCHED_INFO_PUT ) + { + ctx_allow = params->ctx_allow; + } + else + { + params->ctx_allow = ctx_allow; + } + return 0; } @@ -187,24 +194,40 @@ int bvt_adjdom(struct task_struct *p, struct sched_adjdom_cmd *cmd) { struct bvt_adjdom *params = &cmd->u.bvt; - unsigned long mcu_adv = params->mcu_adv, - warp = params->warp, - warpl = params->warpl, - warpu = params->warpu; - - struct bvt_dom_info *inf = BVT_INFO(p); - - /* Sanity -- this can avoid divide-by-zero. */ - if ( mcu_adv == 0 ) - return -EINVAL; - - spin_lock_irq(&schedule_lock[p->processor]); - inf->mcu_advance = mcu_adv; - inf->warp = warp; - inf->warpl = warpl; - inf->warpu = warpu; - spin_unlock_irq(&schedule_lock[p->processor]); + unsigned long flags; + if ( cmd->direction == SCHED_INFO_PUT ) + { + unsigned long mcu_adv = params->mcu_adv, + warp = params->warp, + warpl = params->warpl, + warpu = params->warpu; + + struct bvt_dom_info *inf = BVT_INFO(p); + + /* Sanity -- this can avoid divide-by-zero. */ + if ( mcu_adv == 0 ) + return -EINVAL; + + spin_lock_irqsave(&schedule_lock[p->processor], flags); + inf->mcu_advance = mcu_adv; + inf->warp = warp; + inf->warpl = warpl; + inf->warpu = warpu; + spin_unlock_irqrestore(&schedule_lock[p->processor], flags); + } + else if ( cmd->direction == SCHED_INFO_GET ) + { + struct bvt_dom_info *inf = BVT_INFO(p); + + spin_lock_irqsave(&schedule_lock[p->processor], flags); + params->mcu_adv = inf->mcu_advance; + params->warp = inf->warp; + params->warpl = inf->warpl; + params->warpu = inf->warpu; + spin_unlock_irqrestore(&schedule_lock[p->processor], flags); + } + return 0; } diff --git a/xen/common/sched_rrobin.c b/xen/common/sched_rrobin.c index 544803f1fa..73d73bf899 100644 --- a/xen/common/sched_rrobin.c +++ b/xen/common/sched_rrobin.c @@ -1,5 +1,5 @@ /**************************************************************************** - * Very stupid Round Robin Scheduler for Xen + * Round Robin Scheduler for Xen * * by Mark Williamson (C) 2004 Intel Research Cambridge */ @@ -33,7 +33,15 @@ static task_slice_t rr_do_schedule(s_time_t now) static int rr_ctl(struct sched_ctl_cmd *cmd) { - rr_slice = cmd->u.rrobin.slice; + if(cmd->direction == SCHED_INFO_PUT) + { + rr_slice = cmd->u.rrobin.slice; + } + else /* cmd->direction == SCHED_INFO_GET */ + { + cmd->u.rrobin.slice = rr_slice; + } + return 0; } diff --git a/xen/common/schedule.c b/xen/common/schedule.c index 7b06d3a109..496b35b9a8 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -41,7 +41,7 @@ #define TIME_SLOP (s32)MICROSECS(50) /* allow time to slip a bit */ /* - * XXX Pull trace-related #defines out of here and into an auto-generated + * TODO MAW pull trace-related #defines out of here and into an auto-generated * header file later on! */ #define TRC_SCHED_DOM_ADD 0x00010000 @@ -68,23 +68,25 @@ static void t_timer_fn(unsigned long unused); static void dom_timer_fn(unsigned long data); static void fallback_timer_fn(unsigned long unused); -/* This is global for now so that private implementations can reach it. */ +/* This is global for now so that private implementations can reach it */ schedule_data_t schedule_data[NR_CPUS]; /* - * XXX It would be nice if the schedulers array could get populated + * TODO: It would be nice if the schedulers array could get populated * automagically without having to hack the code in here. */ -extern struct scheduler sched_bvt_def, sched_rrobin_def; +extern struct scheduler sched_bvt_def, sched_rrobin_def, sched_atropos_def; static struct scheduler *schedulers[] = { &sched_bvt_def, &sched_rrobin_def, + &sched_atropos_def, NULL}; /* Operations for the current scheduler. */ static struct scheduler ops; -#define SCHED_FN(fn, ...) \ - ((ops.fn != NULL) ? (ops.fn(__VA_ARGS__)) : (typeof(ops.fn(__VA_ARGS__)))0) +#define SCHED_OP(fn, ...) \ + (( ops.fn != NULL ) ? ops.fn( __VA_ARGS__ ) \ + : (typeof(ops.fn(__VA_ARGS__)))0 ) spinlock_t schedule_lock[NR_CPUS] __cacheline_aligned; @@ -101,7 +103,7 @@ extern kmem_cache_t *task_struct_cachep; void free_task_struct(struct task_struct *p) { - SCHED_FN(free_task, p); + SCHED_OP(free_task, p); kmem_cache_free(task_struct_cachep, p); } @@ -114,15 +116,15 @@ struct task_struct *alloc_task_struct(void) if ( (p = kmem_cache_alloc(task_struct_cachep,GFP_KERNEL)) == NULL ) return NULL; + + memset(p, 0, sizeof(*p)); - memset(p, 0, sizeof(*p)); - - if ( SCHED_FN(alloc_task, p) < 0) + if ( SCHED_OP(alloc_task, p) < 0 ) { - kmem_cache_free(task_struct_cachep, p); + kmem_cache_free(task_struct_cachep,p); return NULL; } - + return p; } @@ -146,7 +148,7 @@ void sched_add_domain(struct task_struct *p) schedule_data[p->processor].idle = p; } - SCHED_FN(add_task, p); + SCHED_OP(add_task, p); TRACE_3D(TRC_SCHED_DOM_ADD, _HIGH32(p->domain), _LOW32(p->domain), p); } @@ -160,7 +162,7 @@ int sched_rem_domain(struct task_struct *p) rem_ac_timer(&p->timer); - SCHED_FN(rem_task, p); + SCHED_OP(rem_task, p); TRACE_3D(TRC_SCHED_DOM_REM, _HIGH32(p->domain), _LOW32(p->domain), p); @@ -173,9 +175,9 @@ void init_idle_task(void) unsigned long flags; struct task_struct *p = current; - if ( SCHED_FN(alloc_task, p) < 0 ) - panic("Failed to allocate scheduler private data for idle task"); - SCHED_FN(add_task, p); + if ( SCHED_OP(alloc_task, p) < 0) + panic("Failed to allocate scheduler private data for idle task"); + SCHED_OP(add_task, p); spin_lock_irqsave(&schedule_lock[p->processor], flags); p->has_cpu = 1; @@ -191,12 +193,12 @@ void __wake_up(struct task_struct *p) ASSERT(p->state != TASK_DYING); - if ( unlikely(__task_on_runqueue(p)) ) + if ( unlikely(__task_on_runqueue(p)) ) return; p->state = TASK_RUNNING; - SCHED_FN(wake_up, p); + SCHED_OP(wake_up, p); #ifdef WAKEUP_HISTO p->wokenup = NOW(); @@ -300,15 +302,12 @@ long do_set_timer_op(unsigned long timeout_hi, unsigned long timeout_lo) return 0; } +/** sched_id - fetch ID of current scheduler */ +int sched_id() +{ + return ops.sched_id; +} -/** - * sched_ctl - dispatch a scheduler control operation - * @cmd: the command passed in the dom0 op - * - * Given a generic scheduler control operation, call the control function for - * the scheduler in use, passing the appropriate control information from the - * union supplied. - */ long sched_ctl(struct sched_ctl_cmd *cmd) { TRACE_0D(TRC_SCHED_CTL); @@ -316,7 +315,7 @@ long sched_ctl(struct sched_ctl_cmd *cmd) if ( cmd->sched_id != ops.sched_id ) return -EINVAL; - return SCHED_FN(control, cmd); + return SCHED_OP(control, cmd); } @@ -328,6 +327,9 @@ long sched_adjdom(struct sched_adjdom_cmd *cmd) if ( cmd->sched_id != ops.sched_id ) return -EINVAL; + if ( cmd->direction != SCHED_INFO_PUT && cmd->direction != SCHED_INFO_GET ) + return -EINVAL; + p = find_domain_by_id(cmd->domain); if( p == NULL ) @@ -335,7 +337,7 @@ long sched_adjdom(struct sched_adjdom_cmd *cmd) TRACE_2D(TRC_SCHED_ADJDOM, _HIGH32(p->domain), _LOW32(p->domain)); - SCHED_FN(adjdom, p, cmd); + SCHED_OP(adjdom, p, cmd); put_task_struct(p); return 0; @@ -351,7 +353,7 @@ long sched_adjdom(struct sched_adjdom_cmd *cmd) */ unsigned long __reschedule(struct task_struct *p) { - int cpu = p->processor; + int cpu = p->processor; struct task_struct *curr; s_time_t now, min_time; @@ -376,7 +378,7 @@ unsigned long __reschedule(struct task_struct *p) if ( schedule_data[cpu].s_timer.expires > min_time + TIME_SLOP ) mod_ac_timer(&schedule_data[cpu].s_timer, min_time); - return SCHED_FN(reschedule, p); + return SCHED_OP(reschedule, p); } void reschedule(struct task_struct *p) @@ -385,6 +387,7 @@ void reschedule(struct task_struct *p) spin_lock_irqsave(&schedule_lock[p->processor], flags); cpu_mask = __reschedule(p); + spin_unlock_irqrestore(&schedule_lock[p->processor], flags); #ifdef CONFIG_SMP @@ -420,7 +423,6 @@ asmlinkage void __enter_scheduler(void) ASSERT(!in_interrupt()); ASSERT(__task_on_runqueue(prev)); ASSERT(prev->state != TASK_UNINTERRUPTIBLE); - ASSERT(prev != NULL); if ( prev->state == TASK_INTERRUPTIBLE ) { @@ -428,19 +430,16 @@ asmlinkage void __enter_scheduler(void) if ( signal_pending(prev) ) prev->state = TASK_RUNNING; else - SCHED_FN(do_block, prev); + SCHED_OP(do_block, prev); } + prev->cpu_time += now - prev->lastschd; + /* get policy-specific decision on scheduling... */ next_slice = ops.do_schedule(now); r_time = next_slice.time; - next = next_slice.task; - - if ( likely(!is_idle_task(prev)) ) - prev->cpu_time += (now - prev->lastschd); - - /* now, switch to the new task... */ + next = next_slice.task; prev->has_cpu = 0; next->has_cpu = 1; @@ -484,8 +483,6 @@ asmlinkage void __enter_scheduler(void) TRACE_2D(TRC_SCHED_SWITCH, next->domain, next); - ASSERT(next->processor == current->processor); - switch_to(prev, next); if ( unlikely(prev->state == TASK_DYING) ) @@ -520,7 +517,6 @@ int idle_cpu(int cpu) static void s_timer_fn(unsigned long unused) { TRACE_0D(TRC_SCHED_S_TIMER_FN); - set_bit(_HYP_EVENT_NEED_RESCHED, ¤t->hyp_events); perfc_incrc(sched_irq); } @@ -532,6 +528,8 @@ static void t_timer_fn(unsigned long unused) TRACE_0D(TRC_SCHED_T_TIMER_FN); + TRACE_0D(TRC_SCHED_T_TIMER_FN); + if ( !is_idle_task(p) ) send_guest_virq(p, VIRQ_TIMER); @@ -611,10 +609,8 @@ void __init scheduler_init(void) if ( ops.do_schedule == NULL) panic("Chosen scheduler has NULL do_schedule!"); - if ( SCHED_FN(init_scheduler) < 0 ) + if ( SCHED_OP(init_scheduler) < 0 ) panic("Initialising scheduler failed!"); - - SCHED_FN(add_task, &idle0_task); } /* @@ -654,7 +650,7 @@ static void dump_rqueue(struct list_head *queue, char *name) list_for_each (list, queue) { p = list_entry(list, struct task_struct, run_list); printk("%3d: %llu has=%c ", loop++, p->domain, p->has_cpu ? 'T':'F'); - SCHED_FN(dump_runq_el, p); + SCHED_OP(dump_runq_el, p); printk("c=0x%X%08X\n", (u32)(p->cpu_time>>32), (u32)p->cpu_time); printk(" l: %lx n: %lx p: %lx\n", (unsigned long)list, (unsigned long)list->next, @@ -670,18 +666,48 @@ void dump_runq(u_char key, void *dev_id, struct pt_regs *regs) int i; printk("Scheduler: %s (%s)\n", ops.name, ops.opt_name); - SCHED_FN(dump_settings); + SCHED_OP(dump_settings); printk("NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now); for (i = 0; i < smp_num_cpus; i++) { spin_lock_irqsave(&schedule_lock[i], flags); printk("CPU[%02d] ", i); - SCHED_FN(dump_cpu_state,i); + SCHED_OP(dump_cpu_state,i); dump_rqueue(&schedule_data[i].runqueue, "rq"); spin_unlock_irqrestore(&schedule_lock[i], flags); } return; } +/* print human-readable "state", given the numeric code for that state */ +void sched_prn_state(int state) +{ + int ret = 0; + + switch(state) + { + case TASK_RUNNING: + printk("Running"); + break; + case TASK_INTERRUPTIBLE: + printk("Int sleep"); + break; + case TASK_UNINTERRUPTIBLE: + printk("UInt sleep"); + break; + case TASK_STOPPED: + printk("Stopped"); + break; + case TASK_DYING: + printk("Dying"); + break; + default: + ret = SCHED_OP(prn_state, state); + } + + if ( ret != 0 ) + printk("Unknown"); +} + #if defined(WAKEUP_HISTO) || defined(BLOCKTIME_HISTO) void print_sched_histo(u_char key, void *dev_id, struct pt_regs *regs) { diff --git a/xen/include/hypervisor-ifs/dom0_ops.h b/xen/include/hypervisor-ifs/dom0_ops.h index ce748d5d31..251f4853a4 100644 --- a/xen/include/hypervisor-ifs/dom0_ops.h +++ b/xen/include/hypervisor-ifs/dom0_ops.h @@ -18,7 +18,7 @@ * This makes sure that old versions of dom0 tools will stop working in a * well-defined way (rather than crashing the machine, for instance). */ -#define DOM0_INTERFACE_VERSION 0xAAAA000A +#define DOM0_INTERFACE_VERSION 0xAAAA000B #define MAX_CMD_LEN 256 #define MAX_DOMAIN_NAME 16 @@ -96,7 +96,6 @@ typedef struct dom0_getdomaininfo_st #define DOMSTATE_STOPPED 1 int state; int hyp_events; - unsigned long mcu_advance; unsigned int tot_pages; long long cpu_time; unsigned long shared_info_frame; /* MFN of shared_info struct */ @@ -214,6 +213,16 @@ typedef struct dom0_pcidev_access_st int enable; } dom0_pcidev_access_t; +/* + * Get the ID of the current scheduler. + */ +#define DOM0_SCHED_ID 24 +typedef struct dom0_sched_id_st +{ + /* OUT variable */ + int sched_id; +} dom0_sched_id_t; + typedef struct dom0_op_st { unsigned long cmd; @@ -239,6 +248,7 @@ typedef struct dom0_op_st dom0_gettbufs_t gettbufs; dom0_physinfo_t physinfo; dom0_pcidev_access_t pcidev_access; + dom0_sched_id_t sched_id; } u; } dom0_op_t; diff --git a/xen/include/hypervisor-ifs/sched_ctl.h b/xen/include/hypervisor-ifs/sched_ctl.h index bd9e9d082c..a2e57c2b7c 100644 --- a/xen/include/hypervisor-ifs/sched_ctl.h +++ b/xen/include/hypervisor-ifs/sched_ctl.h @@ -7,18 +7,24 @@ #ifndef __SCHED_CTL_H__ #define __SCHED_CTL_H__ -/* Scheduler types. */ +/* Scheduler types */ #define SCHED_BVT 0 #define SCHED_ATROPOS 1 #define SCHED_RROBIN 2 +/* these describe the intended direction used for a scheduler control or domain + * command */ +#define SCHED_INFO_PUT 0 +#define SCHED_INFO_GET 1 + /* - * Generic scheduler control command: union of all scheduler control command - * structures. + * Generic scheduler control command - used to adjust system-wide scheduler + * parameters */ struct sched_ctl_cmd { unsigned int sched_id; + int direction; /* are we getting or putting settings? */ union { @@ -40,6 +46,7 @@ struct sched_adjdom_cmd { unsigned int sched_id; domid_t domain; + int direction; /* are we getting or putting settings? */ union { @@ -53,6 +60,9 @@ struct sched_adjdom_cmd struct atropos_adjdom { + u64 period; + u64 slice; + u64 latency; int xtratime; } atropos; } u; diff --git a/xen/include/xen/sched-if.h b/xen/include/xen/sched-if.h index 683e73d4f6..7e55f46b91 100644 --- a/xen/include/xen/sched-if.h +++ b/xen/include/xen/sched-if.h @@ -40,8 +40,7 @@ struct scheduler void (*free_task) (struct task_struct *); void (*rem_task) (struct task_struct *); void (*wake_up) (struct task_struct *); - /* XXX why does do_block need to return anything at all? */ - long (*do_block) (struct task_struct *); + void (*do_block) (struct task_struct *); task_slice_t (*do_schedule) (s_time_t); int (*control) (struct sched_ctl_cmd *); int (*adjdom) (struct task_struct *, @@ -50,6 +49,7 @@ struct scheduler void (*dump_settings) (void); void (*dump_cpu_state) (int); void (*dump_runq_el) (struct task_struct *); + int (*prn_state) (int); }; /* per CPU scheduler information */ diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index c12ac2ca5e..1b8bd10d3c 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -1,6 +1,9 @@ #ifndef _LINUX_SCHED_H #define _LINUX_SCHED_H +#include +#include +#include #include #include #include @@ -266,6 +269,7 @@ void sched_add_domain(struct task_struct *p); int sched_rem_domain(struct task_struct *p); long sched_ctl(struct sched_ctl_cmd *); long sched_adjdom(struct sched_adjdom_cmd *); +int sched_id(); void init_idle_task(void); void __wake_up(struct task_struct *p); void wake_up(struct task_struct *p); @@ -302,6 +306,7 @@ void startup_cpu_idle_loop(void); void continue_cpu_idle_loop(void); void continue_nonidle_task(void); +void sched_prn_state(int state); /* This task_hash and task_list are protected by the tasklist_lock. */ #define TASK_HASH_SIZE 256 -- 2.30.2